****************************************************************************************************
****************************************************************************************************
* PURPOSE: 
* - CREATE AGGREGATED VARIABLES FOR REGRESSIONS ACROSS THE DISTRIBUTION OF WAGES AS IN DUSTMANN ETAL 2013
*
* LAST MODIFIED: JULY, 2025
*****************************************************************************************************

clear all 

* INSTALL SCHEMES FOR GRAPHS
*ssc install blindschemes, replace all 
*ssc install schemepack, replace /*based on https://github.com/asjadnaqvi/Stata-schemes*/

**# DIRECTORIES

* PAPER DIRECTORY DROPBOX
global fiscal "/Users/andres/Library/CloudStorage/Dropbox/2 Papers/2023/Paper_Fiscal_Costs"

* SUBDIRECTORIES
global data "$fiscal/1 Data/12 Final Datasets"
global programs "$fiscal/2 Programs/2 Figures and Tables paper"
global results "$fiscal/3 Results"

**# LOAD DATASET AND CREATE NEW VARIABLES

* LOAD DATASET WITH VARIABLES NEEDED
use "$data/LaborEffects.dta", clear

**#SAMPLE SELECTION 

* ONLY METROPOLITAN AREAS 
drop if area >= 81 /* not observations for these areas*/
drop if area == 25 /* not observations */
keep if area !=. 

* DUMMIES FOR YEAR AND MSA
tab year, gen(year)
tab area, gen(area)

* KEEP ONLY WORKING-AGE POPULATION
drop if (age<15|age>64) 

* ACTIVE IN THE LABOR FORCE
keep if labforce == 1 

* DELIMIT THE ANALYSIS FOR THE YEARS 2013-2018. 
drop if year > 2018

* DISPLAY FORMAT FOR SAMPLING WEIGHTS 
format %12.0f fex12

* GENERATE ROUNDED WEIGHTS FOR SPECIFIC COMMANDS THAT REQUIRE THEM 
gen fex12_round = fex12
replace fex12_round = round(fex12_round) 

* DUMMIES FOR IMMIGRANTS FROM VENEZUELA 
tab immig, gen(immig)

* NEW CATEGORICAL VARIABLE FOR RELEVANT CATEGORIES
gen immi3cat = 0 
* MIGRANTS FROM VENEZUELA
replace immi3cat = 1 if immig2 == 1
replace immi3cat = 1 if immig3 == 1
replace immi3cat = 1 if immig4 == 1
replace immi3cat = 1 if immig5 == 1
* MIGRANTS FROM OTHER COUNTRIES
replace immi3cat = 2 if immig6 == 1

* DUMMIES FOR IMMIGRATION CATEGORIES
tab immi3cat, gen(immi3cat)

* DUMMIES FOR SKILL CATEGORIES
tab skill_group, gen(skill_group)

* COLLAPSE TO GET AVERAGE AGE AND COUNTS PER SKILL BY MIGRANT CATEGORY, AREA, AND YEAR
collapse (sum) skill_group1 skill_group2 skill_group3 ///
 (mean) age  [pw = fex12], by(immi3cat year area)

* GEN RATIO HIGH TO LOW 
gen ln_highTolow = ln((skill_group3 / skill_group1)*100)
summ ln_highTolow, detail 
 
* GEN RATIO INTERMEDIATE TO LOW 
gen ln_interTolow = ln((skill_group2 / skill_group1)*100)
summ ln_interTolow, detail 

drop skill_group1 skill_group2 skill_group3

* RESHAPE TO GET DIFFERENT VARIABLES
reshape wide age ln_highTolow ln_interTolow, i(year area) j(immi3cat) 

* KEEP RELEVANT VARIABLES OF INTEREST 
keep year area age0 ln_highTolow0 ln_interTolow0 age1

rename age0 MeanAgeNatives
rename ln_highTolow0  ln_highTolowNatives
rename ln_interTolow0 ln_interTolowNatives
rename age1 MeanAgeImmFromVenz

label var MeanAgeNatives "Mean Age Natives"
label var ln_highTolowNatives "ln ratio of hight to low-educated native workers"
label var ln_interTolowNatives "ln ratio of intermediate to low-educated native workers"
label var MeanAgeImmFromVenz "Mean Age Immigrants from Venezuela"

order MeanAgeImmFromVenz, after(MeanAgeNatives)


